# https://satijalab.org/signac/articles/pbmc_multiomic.html
counts <- Read10X_h5("filtered_feature_bc_matrix.h5")
fragpath <- "atac_fragments.tsv.gz"
# get gene annotations for hg38

annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
genome(annotation) <- "hg38"
seqlevelsStyle(annotation) <- "UCSC"


# seqlevelsStyle(annotation) <- "Ensembl"

# create a Seurat object containing the RNA adata

npm1 <- CreateSeuratObject(
        counts = counts$`Gene Expression`,
        assay = "RNA"
)

# npm1[["percent.mt"]] <- PercentageFeatureSet(npm1, pattern = "^MT-")
# 
# # create ATAC assay and add it to the object
# 
# npm1[["ATAC"]] <- CreateChromatinAssay(
#         counts = counts$Peaks,
#         sep = c(":", "-"),
#         fragments = fragpath,
#         annotation = annotation
# 
# )
DefaultAssay(npm1) <- "RNA"


VlnPlot(
  object = npm1,
  features = c("nCount_RNA"),
  ncol = 4,
  pt.size = 0
)

npm1 <- subset(
  x = npm1,
  subset = nCount_RNA < 7500
)

VlnPlot(
  object = npm1,
  features = c("nCount_RNA"),
  ncol = 4,
  pt.size = 0
)

#npm1 <- SCTransform(npm1) %>% RunPCA() %>% RunUMAP(dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
npm1 <- SCTransform(npm1)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================| 100%
npm1 <- RunPCA(npm1)
#Add cell labels from the previously generated scRNA annotation
#reference <- readRDS("~/Documents/Research/NPM1-AML/NPM1_seurat/NPM1_seurat.rds")
reference <- readRDS("~/Downloads/namlab/NPM1_seurat/NPM1_seurat.rds")

npm1 <- AddMetaData(
        object = npm1,
        metadata = reference@meta.data %>% select(Cell.Ident_Mutation.Status) %>% filter(rownames(.) %in% BiocGenerics::intersect(rownames(npm1@meta.data), rownames(reference@meta.data))))

Idents(npm1) <- "Cell.Ident_Mutation.Status"
DefaultAssay(npm1) <- "SCT"
stem_cell_markers_1 <- FindMarkers(npm1, ident.1 = "HSC1_WT", ident.2 = "HSC2_MUT", only.pos = FALSE) %>% arrange(desc(avg_log2FC))
#stem_cell_markers_2 <- FindMarkers(npm1, ident.1 = "HSC2_MUT", ident.2 = "HSC1_MUT", only.pos = FALSE) %>% arrange(desc(avg_log2FC))
#monocytic_markers <- FindMarkers(npm1, ident.1 = "Monocytic_MUT", ident.2 = "Monocytic_WT", only.pos = FALSE) %>% arrange(desc(avg_log2FC))

GSEA from g:Profiler. List of genes was mapped to GO terms.

gostres <- gost(query = rownames(stem_cell_markers_1), organism = "hsapiens")
gostplot(gostres, capped = FALSE, interactive = TRUE)

Prepare input

original_gene_list <- stem_cell_markers_1$avg_log2FC
names(original_gene_list) <- rownames(stem_cell_markers_1)
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)

Volcano plot

#de_mono = monocytic_markers
de_hsc = stem_cell_markers_1
# de_hsc = monocytic_markers

Name2 = "HSC1_WT vs. HSC2_MUT"

de_hsc$threshold <- as.factor(ifelse(de_hsc$p_val_adj < 0.05 & abs(de_hsc$avg_log2FC) >= 0.25, ifelse(de_hsc$avg_log2FC> 0.25 ,'Up','Down'),'NoSignifi'))

ggplot(data=de_hsc, aes(x=avg_log2FC, y=-log10(p_val_adj), colour=threshold)) +
geom_point(alpha=1, size=1.5) +
scale_color_manual(values=c("green", "red", "grey")) +
xlim(c(-4.5, 4.5)) +
geom_vline(xintercept=c(-.25, .25), lty=4,col="black",lwd=0.8) +
geom_hline(yintercept=-log10(0.05), lty=4,col="black",lwd=0.8) +
annotate("text", x=c(-1.2, 1.2), y=1.8, label=c("-1", "1")) +
annotate("text", x=-4, y=1.8, label="-log10(0.05)") +
labs(x="log2(fold change)", y="-log10 (p-value)", title=Name2) +
theme(plot.title=element_text(hjust=0.5), legend.position="right", legend.title=element_blank(
))

GO.title <- paste(Name2,"GO", collapse = " ")
KEGG.title <- paste(Name2,"KEGG", collapse = " ")

Convert gene symbols to ENTREZ IDs and add to dataframe.

symbols = rownames(de_hsc)
entrez = mapIds(org.Hs.eg.db, symbols, 'ENTREZID', 'SYMBOL')
de_hsc$entrezgene_id = entrez

Remove genes with NA IDs and sort the resulting vector.

# logFC <- de_hsc$avg_log2FC
# names(logFC) <- de_hsc$entrezgene_id
# logFC <- logFC[na.exclude(names(logFC))]
# logFC <- sort(logFC, decreasing = T)

GSEA

gse <- gseGO(geneList=gene_list, 
             ont ="ALL", 
             keyType = "SYMBOL", 
             minGSSize = 3, 
             maxGSSize = 800, 
             pvalueCutoff = 0.05, 
             verbose = TRUE, 
             OrgDb = org.Hs.eg.db, 
             pAdjustMethod = "none")

Dot plot

require(DOSE)
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)

Enrichment map. Enriched GO terms are organized into a network with edges connecting overlapping gene sets.

x2 <- pairwise_termsim(gse)
emapplot(x2, showCategory = 10)

Network of links between genes and GO terms. Top five GO terms are shown.

cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 5)

Ridge plot (frequency of fold values per gene within each set)

ridgeplot(gse) + labs(x = "enrichment distribution")

GSEA plot

#gseaplot(gse, by = "all", title = gse$Description[3], geneSetID = 3)
gseaplot2(gse, geneSetID=1:10, title = GO.title)

KEGG GSEA

ids<-bitr(names(original_gene_list), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=org.Hs.eg.db)

dedup_ids = ids[!duplicated(ids[c("SYMBOL")]),]

df2 = stem_cell_markers_1[rownames(stem_cell_markers_1) %in% dedup_ids$SYMBOL,]

df2$Y = dedup_ids$ENTREZID
kegg_gene_list <- df2$avg_log2FC

names(kegg_gene_list) <- df2$Y

kegg_gene_list<-na.omit(kegg_gene_list)

kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)

Create gseKEGG object

kegg_organism = "hsa"
kk2 <- gseKEGG(geneList     = kegg_gene_list,
               organism     = kegg_organism,
               minGSSize    = 3,
               maxGSSize    = 800,
               pvalueCutoff = 0.05,
               pAdjustMethod = "none",
               keyType       = "ncbi-geneid")

Dot plot

dotplot(kk2, showCategory = 10, title = "Enriched Pathways" , split=".sign") + facet_grid(.~.sign)

Enrichment map

x3 <- pairwise_termsim(kk2)
emapplot(x3)

cnetplot(kk2, categorySize="pvalue", foldChange=gene_list)

ridgeplot(kk2) + labs(x = "enrichment distribution")

GSEA plot (KEGG)

#gseaplot(kk2, by = "all", title = kk2$Description[1], geneSetID = 1)
#gseaplot2(kk2, geneSetID=1:10, title = KEGG.title)
gseaplot2(kk2, geneSetID=1:5, title = KEGG.title)

hsa_path <- pathview(gene.data=kegg_gene_list, pathway.id="hsa05222", species = kegg_organism)
knitr::include_graphics("hsa05222.pathview.png")

knitr::include_graphics("hsa05222.png")

ORA

Create enrichGO object

#geneName2 <- de_hsc$entrezgene_id[abs(de_hsc$avg_log2FC) > 0.25 & de_hsc$p_val_adj<0.05]
geneName2 <- de_hsc$entrezgene_id#[de_hsc$p_val_adj<0.05]
geneName2 <- na.exclude(geneName2)
# de_hsc$logFC = de_hsc$avg_log2FC
# de_hsc$adj.P.Val = de_hsc$p_val_adj

df = as.data.frame(org.Hs.egGO)
go_gene_list = unique(sort(df$gene_id))

length(geneName2)
## [1] 399
go_enrich <- enrichGO(gene    = geneName2,
                universe      = go_gene_list,
                OrgDb         = org.Hs.eg.db,
                ont           = "BP",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
                readable      = TRUE)

#head(ego2)
#length(ego2$ID)

dotplot(go_enrich, showCategory=15) + ggtitle("GO enrichment")

Upset plot: emphasize genes overlapping among different gene sets.

upsetplot(go_enrich)

wcdf<-read.table(text=go_enrich$GeneRatio, sep = "/")[1]
wcdf$term<-go_enrich[,2]
wordcloud(words = wcdf$term, freq = wcdf$V1, scale=(c(1, .05)), colors=brewer.pal(8, "Dark2"), max.words = 25)

barplot(go_enrich, 
        drop = TRUE, 
        showCategory = 10, 
        title = "GO Biological Pathways",
        font.size = 8)

dotplot(go_enrich)
## wrong orderBy parameter; set to default `orderBy = "x"`

Enrichment plot

x4 <- pairwise_termsim(go_enrich)
emapplot(x4)

goplot(go_enrich, showCategory = 10)
## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cnetplot(go_enrich, categorySize="pvalue", foldChange=gene_list)

KEGG pathway enrichment

ids<-bitr(names(original_gene_list), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(names(original_gene_list), fromType = "SYMBOL", toType =
## "ENTREZID", : 6.34% of input gene IDs are fail to map...
dedup_ids = ids[!duplicated(ids[c("SYMBOL")]),]

df2 = stem_cell_markers_1[rownames(stem_cell_markers_1) %in% dedup_ids$SYMBOL,]

df2$Y = dedup_ids$ENTREZID
kegg_gene_list <- df2$avg_log2FC

names(kegg_gene_list) <- df2$Y

kegg_gene_list<-na.omit(kegg_gene_list)

kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)

kegg_sig_genes_df = subset(df2, p_val < 0.05)

kegg_genes <- kegg_sig_genes_df$avg_log2FC

#names(kegg_genes) <- kegg_sig_genes_df2$Y
names(kegg_genes) <- rownames(kegg_sig_genes_df)

kegg_genes <- na.omit(kegg_genes)